package com.xch.sboot.service.grab.impl;

import com.xch.sboot.service.common.FileService;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

import javax.annotation.Resource;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 消息抓取
 * @author xch
 * 2023/1/30 16:30
 */

@Slf4j
@Service
public class BingGrapService {

    @Resource
    private FileService fileService;

    private static final Pattern pattern = initializePattern();

    /**
     * 百度的新闻榜地址
     */
    private static final String BING_COM_URL = "https://cn.bing.com/";

    public void grapImage() {
        LocalDateTime now = LocalDateTime.now();
        log.info("grapImage begin {}", now);
        Map<String, String> imageMap = grap();
        if (imageMap.isEmpty()) {
            return;
        }
        for (Map.Entry<String, String> image : imageMap.entrySet()) {
            String imgaeName = image.getKey();
            String imgaeUrl = image.getValue();
            downImage(imgaeUrl, imgaeName);
            log.info("grapImage bing image:{}-{} is success!", imgaeName, imgaeUrl);
        }
        log.info("grapImage end {}", LocalDateTime.now());
    }

    private void downImage(final String imageUrl, final String imageName) {
        log.info("image url {}", imageUrl);
        Connection connection = Jsoup.connect(imageUrl);
        Connection.Response response = null;
        try {
            response = connection.method(Connection.Method.GET).ignoreContentType(true).timeout(10*1000).execute();
        } catch (IOException e) {
            e.printStackTrace();
        }
        assert response != null;
        BufferedInputStream bufferedInputStream = response.bodyStream();
        fileService.saveFile(bufferedInputStream, imageName);
    }

    private static Map<String, String> grap() {
        Map<String, String> imgaeMap = new HashMap<>(2);
        try {
            Document baiduHtml = Jsoup.connect(BING_COM_URL).get();
            Elements elements = baiduHtml.getElementsByClass("hp_top_cover");
            Element element = elements.get(0);
            String style = element.attr("style");
            List<String> urls = findUrl(style);
            if (CollectionUtils.isEmpty(urls)) {
                return imgaeMap;
            }
            String imageUrl = urls.get(0);
            Element headline = baiduHtml.getElementById("headline");
            assert headline != null;
            String imageName = headline.html();
            imgaeMap.put(imageName, imageUrl);
        } catch (Exception e) {
            log.error("grap error {}", e.getMessage());
            e.printStackTrace();
        }
        return imgaeMap;
    }

    private static List<String> findUrl(String style) {
        List<String> urls = new ArrayList<>();
        Matcher matcher = pattern.matcher(style);
        while (matcher.find()) {
            String urlStr = matcher.group();
            if (!urlStr.startsWith("http")) {
                urlStr = "http://" + urlStr;
            }
            urls.add(urlStr);
        }
        return urls;
    }

    private static Pattern initializePattern() {
        return Pattern.compile("\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
                "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
                "|mil|biz|info|mobi|name|aero|jobs|museum" +
                "|travel|[a-z]{2}))(:[\\d]{1,5})?" +
                "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
                "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
                "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
                "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
                "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
                "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
    }
}
